Production over Station and Season
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle") &
norep_filter_name != "MOTEJ515"),
aes(x = lakesite, y = log10(as.numeric(fraction_bac_abund)), fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Log10(Bacterial Cells/mL)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)

prod1 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(axis.text.x = element_blank(), #element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
prod2 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF TOTAL PRODUCTIVITY
ggplot(dplyr::filter(metadata, year == "2015" & fraction == "Free"),
aes(x = lakesite, y = tot_bacprod)) +
geom_bar(stat = "identity", color = "black", fill = "grey", position=position_dodge()) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, ymax = tot_bacprod + SD_tot_bacprod),width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning: Removed 1 rows containing missing values (geom_errorbar).

########## LONG FORMAT OF COMMUNITY PRODUCTIVITY
community_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF CELLULAR PRODUCTIVITY
percapita_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(season ~ ., scales = "free_x") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(community_prod_plot_long, percapita_prod_plot_long,
labels = c("A", "B"),
nrow = 1, ncol = 2,
align = "v")
## Warning: Removed 1 rows containing missing values (geom_bar).

plot_grid(prod1, prod2, labels = c("A", "B"),
rel_heights = c(1, 1.3),
nrow = 2, ncol = 1,
align = "v")
## Warning: Removed 1 rows containing missing values (geom_bar).

### BY STATION
prod_by_station <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = lakesite, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Total Production (ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
prod_by_season <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = season, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Season") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.y = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(prod_by_station, prod_by_season, labels = c("A", "B"),
rel_widths = c(1, 0.75),
nrow = 1, ncol = 2,
align = "h")

scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)
set.seed(777)
# Calculate the SOREN distance
soren_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = TRUE) # Make it presence/absence
p1 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = soren_whole_dist,
color = "fraction",
shape = "lakesite",
title = "Sørensen") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "none")
# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = FALSE) # Make it Abundance weighted
p2 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "lakesite",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "right", legend.title = element_blank())
plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2,
rel_widths = c(0.8, 1.2))

# Calculate bray curtis distance matrix
musk_bray <- phyloseq::distance(scaled_surface_PAFLA_pruned_rm2, method = "bray")
# make a data frame from the sample_data
sampledf <- data.frame(sample_data(scaled_surface_PAFLA_pruned_rm2))
# Adonis test
adonis(musk_bray ~ fraction + season , data = sampledf)
##
## Call:
## adonis(formula = musk_bray ~ fraction + season, data = sampledf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## fraction 1 1.3903 1.39032 16.2279 0.30094 0.001 ***
## season 2 1.5161 0.75804 8.8479 0.32816 0.001 ***
## Residuals 20 1.7135 0.08567 0.37089
## Total 23 4.6199 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear Models with Environmental Variables
pca_scores_df <- summary(pca_environ)$sites %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "norep_filter_name") %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
dplyr::select(-norep_filter_name)
metadata_pca <- metadata %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
left_join(pca_scores_df, by = "norep_water_name")
##### SINGLE REGRESSION with PC1
## Free living
comm_lm_PC1_free <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC1_part <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC1_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC1_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm_row1 <- c("Free", "Per-Liter","PC1",
round(comm_lm_PC1_free$adj.r.squared, digits = 3),
round(comm_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row2 <- c("Particle", "Per-Liter","PC1",
round(comm_lm_PC1_part$adj.r.squared, digits = 3),
round(comm_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_row3 <- c("Free", "Per-Capita","PC1",
round(percap_lm_PC1_free$adj.r.squared, digits = 3),
round(percap_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row4 <- c("Particle", "Per-Capita", "PC1",
round(percap_lm_PC1_part$adj.r.squared, digits = 3),
round(percap_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_results_df <-
rbind(pca_lm_row1, pca_lm_row2, pca_lm_row3, pca_lm_row4)
colnames(pca_lm_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm_results_df) = NULL
pander(pca_lm_results_df,
caption = "Single Linear regression statistics for PC1.")
Single Linear regression statistics for PC1.
| Free |
Per-Liter |
PC1 |
0.06 |
0.221 |
| Particle |
Per-Liter |
PC1 |
-0.075 |
0.641 |
| Free |
Per-Capita |
PC1 |
0.014 |
0.307 |
| Particle |
Per-Capita |
PC1 |
0.006 |
0.33 |
##### SINGLE REGRESSION with PC2
## Free living
comm_lm_PC2_free <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC2_part <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC2_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC2_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm2_row1 <- c("Free", "Per-Liter","PC2",
round(comm_lm_PC2_free$adj.r.squared, digits = 3),
round(comm_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row2 <- c("Particle", "Per-Liter","PC2",
round(comm_lm_PC2_part$adj.r.squared, digits = 3),
round(comm_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_row3 <- c("Free", "Per-Capita","PC2",
round(percap_lm_PC2_free$adj.r.squared, digits = 3),
round(percap_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row4 <- c("Particle", "Per-Capita", "PC2",
round(percap_lm_PC2_part$adj.r.squared, digits = 3),
round(percap_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_results_df <-
rbind(pca_lm2_row1, pca_lm2_row2, pca_lm2_row3, pca_lm2_row4)
colnames(pca_lm2_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm2_results_df) = NULL
pander(pca_lm2_results_df,
caption = "Single Linear regression statistics for PC2.")
Single Linear regression statistics for PC2.
| Free |
Per-Liter |
PC2 |
-0.056 |
0.531 |
| Particle |
Per-Liter |
PC2 |
0.207 |
0.077 |
| Free |
Per-Capita |
PC2 |
0.033 |
0.268 |
| Particle |
Per-Capita |
PC2 |
0.393 |
0.023 |
##### MULTIPLE REGRESSION
## Free living
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.792 -10.435 -7.571 12.769 30.622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.058 5.049 4.765 0.00102 **
## PC1 6.185 4.880 1.268 0.23676
## PC2 -3.262 4.880 -0.669 0.52058
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.49 on 9 degrees of freedom
## Multiple R-squared: 0.1858, Adjusted R-squared: 0.004856
## F-statistic: 1.027 on 2 and 9 DF, p-value: 0.3966
## Particle-associated
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0696 -4.7286 -0.1293 3.1707 15.5457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.958 2.198 4.531 0.00142 **
## PC1 1.147 2.124 0.540 0.60246
## PC2 -4.032 2.124 -1.898 0.09014 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.613 on 9 degrees of freedom
## Multiple R-squared: 0.302, Adjusted R-squared: 0.1469
## F-statistic: 1.947 on 2 and 9 DF, p-value: 0.1983
## PER CAPITA: Free living
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5522 -0.2055 -0.1250 0.2276 0.5222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5750 0.1109 -68.318 1.56e-13 ***
## PC1 0.1176 0.1072 1.097 0.301
## PC2 -0.1269 0.1072 -1.184 0.267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3841 on 9 degrees of freedom
## Multiple R-squared: 0.2245, Adjusted R-squared: 0.05219
## F-statistic: 1.303 on 2 and 9 DF, p-value: 0.3185
## PER CAPITA: Particle-associated
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52230 -0.13916 -0.02677 0.17348 0.63461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6647 0.1112 -59.946 6.66e-12 ***
## PC1 0.2069 0.1081 1.913 0.0921 .
## PC2 -0.3653 0.1096 -3.332 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3643 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6253, Adjusted R-squared: 0.5317
## F-statistic: 6.676 on 2 and 8 DF, p-value: 0.01971
PC1_lm_plot <-
ggplot(metadata_pca, aes(x = PC1, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
PC2_lm_plot <-
ggplot(metadata_pca, aes(x = PC2, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
plot_grid(PC2_lm_plot, PC2_lm_plot,
labels = c("A", "B"),
nrow = 1, ncol = 2)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

par(oma(0.1, 0.1, 0.1, 0.1))
## Error in oma(0.1, 0.1, 0.1, 0.1): could not find function "oma"
biplot(pca_environ,
xlab = paste("PC1", paste(round(summary(pca_environ)$cont$importance[2,1]*100, digits = 2), "%", sep = ""), sep = ": "),
ylab = paste("PC2", paste(round(summary(pca_environ)$cont$importance[2,2]*100, digits = 2), "%", sep = ""), sep = ": "))

Figure 1
######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 x 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fctr> <dbl>
## 1 Particle 41168.88
## 2 Free 734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Counts) \n (cells/mL)") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))
## # A tibble: 2 x 2
## fraction `mean(frac_bacprod)`
## <fctr> <dbl>
## 1 Particle 9.958333
## 2 Free 24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylab("Community Production \n (μgC/L/hr)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=68, fontface = "bold", size = 3.5, color = "gray40",
label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))
## # A tibble: 2 x 2
## fraction `mean(fracprod_per_cell)`
## <fctr> <dbl>
## 1 Particle 4.816116e-07
## 2 Free 3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylim(c(-8.5, -5)) +
ylab("Log10(Cellular Production) \n(μgC/cell/hr)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######## FIGURE 1
# legend
legend <- get_legend(poster_a)
## Error in f(...): object 'season_shapes' not found
row1_plots <- plot_grid(poster_a + theme(legend.position = "none"), poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
## Error in f(...): object 'season_shapes' not found
#fig_1 <-
plot_grid(row1_plots, legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))
## Error in plot_grid(row1_plots, legend, ncol = 1, nrow = 2, rel_heights = c(1, : object 'row1_plots' not found
Figure S1
work_df <- metadata %>%
dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
dplyr::select(-norep_filter_name)
part_work_df <- work_df %>%
filter(fraction == "Particle") %>%
rename(part_bacabund = fraction_bac_abund,
part_prod = frac_bacprod,
part_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
free_work_df <- work_df %>%
filter(fraction == "Free") %>%
rename(free_bacabund = fraction_bac_abund,
free_prod = frac_bacprod,
free_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
byfrac_df <- part_work_df %>%
left_join(free_work_df, by = "norep_water_name")
byfrac_df$season <- substr(byfrac_df$norep_water_name, 5,5) # 7th letter = month sampled
byfrac_df$season <- as.character(byfrac_df$season)
byfrac_df$season <- ifelse(byfrac_df$season == "5", "Spring",
ifelse(byfrac_df$season == "7", "Summer",
ifelse(byfrac_df$season == "9", "Fall",
"NA")))
byfrac_df$season <- factor(byfrac_df$season, levels = c("Spring", "Summer", "Fall"))
summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))
##
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df,
## norep_water_name != "MOTE515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52690 -0.08048 0.05903 0.19565 0.35255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0900 3.2247 0.648 0.533
## log10(free_bacabund) 0.4264 0.5532 0.771 0.461
##
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared: 0.06194, Adjusted R-squared: -0.04229
## F-statistic: 0.5943 on 1 and 9 DF, p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"),
aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Bacterial Counts) \n (cells/mL)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "bottom",
legend.title = element_blank())
## Warning: Ignoring unknown parameters: width
lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)
plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
xlab("Free") + ylab("Particle") +
ggtitle("Community Production \n(μgC/L/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
## Warning: Ignoring unknown parameters: width
lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)
plot3 <- ggplot(byfrac_df,
aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Cellular Production) \n (μgC/cell/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
## Warning: Ignoring unknown parameters: width
legend_s1 <- get_legend(plot1)
## Error in f(...): object 'season_shapes' not found
top_row_S1 <- plot_grid(plot1 +theme(legend.position = "none"), plot2, plot3,
nrow = 1, ncol = 3,
labels = c("A", "B", "C"),
align = "h")
## Warning in f(...): restarting interrupted promise evaluation
## Warning in f(...): restarting interrupted promise evaluation
## Error in f(...): object 'season_shapes' not found
plot_grid(top_row_S1, legend_s1,
rel_heights = c(1, 0.05),
nrow = 2, ncol = 1)
## Error in plot_grid(top_row_S1, legend_s1, rel_heights = c(1, 0.05), nrow = 2, : object 'top_row_S1' not found
Linear Regressions with Fraction Diversity
#################################### Bulk Measure Production ####################################
################### Richness ###################
summary(lm(frac_bacprod ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
# Particle-Associated
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.722233 0.6674307 2.204547 0.2884269
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
################### Shannon Entropy ###################
summary(lm(frac_bacprod ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.659 -11.878 -5.666 9.231 46.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373 26.71652 0.623 0.540
## mean 0.08797 6.22969 0.014 0.989
##
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared: 9.064e-06, Adjusted R-squared: -0.04545
## F-statistic: 0.0001994 on 1 and 22 DF, p-value: 0.9889
# Particle-Associated
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_prod_vs_shannon_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9022 -2.9150 -0.5875 1.6713 12.0544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40.320 14.007 -2.878 0.01643 *
## mean 11.089 3.068 3.614 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5231
## F-statistic: 13.06 on 1 and 10 DF, p-value: 0.004734
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_shannon_PA <- train(
frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.743204 0.6698453 2.440644 0.283921
summary(lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.004 -10.708 -3.738 6.632 37.129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.37 73.87 -0.181 0.860
## mean 9.40 18.50 0.508 0.622
##
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared: 0.02516, Adjusted R-squared: -0.07233
## F-statistic: 0.2581 on 1 and 10 DF, p-value: 0.6225
################### Inverse Simpson ###################
summary(lm(frac_bacprod ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
# Particle-Associated samples
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_invsimps_PA <- train(
frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_invsimps_PA$results # Cross Validated PA results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 5.257223 0.755187 2.091455 0.2576904
#Free Living Samples
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
################### Simpson's Evenness ###################
summary(lm(frac_bacprod ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.000 -7.163 -3.815 3.392 45.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8524 9.2286 -0.092 0.9272
## mean 274.1606 134.4253 2.040 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1208
## F-statistic: 4.16 on 1 and 22 DF, p-value: 0.05359
# Particle-Associated
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.973 -2.229 -1.086 1.356 12.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.829 4.539 -0.844 0.41865
## mean 234.057 71.238 3.286 0.00821 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.471
## F-statistic: 10.79 on 1 and 10 DF, p-value: 0.008211
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_simpseven_PA <- train(
frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.225165 0.6598027 3.078422 0.3241796
summary(lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.389 -12.029 -3.306 4.668 39.946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.85 23.52 0.674 0.516
## mean 114.96 321.02 0.358 0.728
##
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared: 0.01266, Adjusted R-squared: -0.08607
## F-statistic: 0.1282 on 1 and 10 DF, p-value: 0.7277
#################################### Per-Cell Production ####################################
################### Richness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
# Particle-Associated
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_rich_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4259432 0.6442049 0.1528492 0.3200838
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
################### Shannon Entropy ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03514 -0.15042 -0.03394 0.26568 0.82794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.9036 0.7534 -14.472 2.14e-12 ***
## mean 0.8805 0.1763 4.993 6.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4348 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5428, Adjusted R-squared: 0.521
## F-statistic: 24.93 on 1 and 21 DF, p-value: 6.09e-05
# Particle-Associated
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_percell_prod_vs_shannon_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36686 -0.23571 -0.01330 0.03961 0.70312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.8897 0.8820 -11.213 1.37e-06 ***
## mean 0.6993 0.1935 3.614 0.00562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.5468
## F-statistic: 13.06 on 1 and 9 DF, p-value: 0.00562
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_shannon_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4165133 0.6923215 0.1364644 0.282022
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72444 -0.17785 0.08114 0.13946 0.67366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.8666 1.6332 -5.429 0.000289 ***
## mean 0.3243 0.4091 0.793 0.446298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared: 0.05914, Adjusted R-squared: -0.03495
## F-statistic: 0.6285 on 1 and 10 DF, p-value: 0.4463
################### Inverse Simpson ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
# Particle-Associated
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_invsimps_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_invsimps_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.3530999 0.7312218 0.1134866 0.3167412
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
################### Simpson's Evenness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06874 -0.41920 0.04553 0.34511 1.56565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4496 0.4116 -18.10 2.74e-14 ***
## mean 4.3470 6.0344 0.72 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6353 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02411, Adjusted R-squared: -0.02236
## F-statistic: 0.5189 on 1 and 21 DF, p-value: 0.4792
# Particle-Associated
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_percell_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4726 -0.2230 -0.1106 0.1248 0.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5941 0.2885 -26.324 7.96e-10 ***
## mean 15.1887 4.6341 3.278 0.00957 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4935
## F-statistic: 10.74 on 1 and 9 DF, p-value: 0.009566
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_simpseven_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4449297 0.6843005 0.1423662 0.3106107
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63112 -0.24216 0.01388 0.14672 0.77426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9274 0.5202 -15.240 3e-08 ***
## mean 4.9361 7.1008 0.695 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared: 0.04609, Adjusted R-squared: -0.0493
## F-statistic: 0.4832 on 1 and 10 DF, p-value: 0.5028
R2 table
########## PUT A TABLE TOGETHER
# Per Liter Production
perliter_row1 <- c("Richness", "Per-Liter",
round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
perliter_row2 <- c("Shannon_Entropy", "Per-Liter",
round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
perliter_row3 <- c("Inverse_Simpson", "Per-Liter",
round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
perliter_row4 <- c("Simpsons_Evenness","Per-Liter",
round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
# Per cell production
percell_row1 <- c("Richness", "Per-Cell",
round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
percell_row2 <- c("Shannon_Entropy", "Per-Cell",
round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
percell_row3 <- c("Inverse_Simpson", "Per-Cell",
round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
percell_row4 <- c("Simpsons_Evenness", "Per-Cell",
round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
r2_table <- as.data.frame(rbind(perliter_row1, perliter_row2, perliter_row3, perliter_row4,
percell_row1, percell_row2, percell_row3, percell_row4))
colnames(r2_table) <- c("Diversity_Metric", "Production_Measure","Adjusted_R2","CV_R2", "CV_R2_SD")
row.names(r2_table) = NULL
pander(r2_table,
caption = "Supplemental Table 2: \n R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.")
Supplemental Table 2: R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.
| Richness |
Per-Liter |
0.559 |
0.667 |
0.288 |
| Shannon_Entropy |
Per-Liter |
0.523 |
0.67 |
0.284 |
| Inverse_Simpson |
Per-Liter |
0.692 |
0.755 |
0.258 |
| Simpsons_Evenness |
Per-Liter |
0.471 |
0.66 |
0.324 |
| Richness |
Per-Cell |
0.566 |
0.644 |
0.32 |
| Shannon_Entropy |
Per-Cell |
0.547 |
0.692 |
0.282 |
| Inverse_Simpson |
Per-Cell |
0.689 |
0.731 |
0.317 |
| Simpsons_Evenness |
Per-Cell |
0.493 |
0.684 |
0.311 |
Prepare Figure 2
######################################################### OBSERVED RICHNESS
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 556.7992 167.31404
## 2 Free 338.4242 85.98665
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") + xlab("Fraction") +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=790, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Richness vs Per-Liter Heterotrophic Production Plot
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# All 3 richness plots together
rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot, percell_prod_vs_rich_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 35.47659 23.843325
## 2 Free 24.09219 8.136901
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# INVERSE SIMPSON
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Inverse Simpson vs per-cell production Plot
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
Prepare Figure S2
######################################################### SHANNON_ENTROPY
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 4.534078 0.5594638
## 2 Free 3.982314 0.2958221
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(5.8, 5.9, 5.9, 5.8)) # WholePart vs WholeFree
shannon_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
xlab("Fraction") +
# Add line and pval
geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=5.6, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Shannon vs Per-Liter Heterotrophic Production Plot
prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
annotate("text", x = 5.25, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 5.25, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
simpseven_fraction_wilcox <- wilcox.test(mean ~ fraction, data = simpseven)
simpseven_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 55, p-value = 0.3474
## alternative hypothesis: true location shift is not equal to 0
filter(simpseven) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 0.05890559 0.02537484
## 2 Free 0.07138806 0.01716014
# Make a data frame to draw significance line in boxplot (visually calculated)
simpseven_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(0.14, 0.15, 0.15, 0.14)) # WholePart vs WholeFree
simpseven_distribution_plot <-
ggplot(simpseven, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
ylab("Simpson's Evenness") +
xlab("Fraction") +
geom_path(data = simpseven_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=0.14, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# SIMPSONS EVENNESS
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
ylab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=15, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Plot
percell_prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=-6.3, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
simpseven_plots <- plot_grid(simpseven_distribution_plot, prod_vs_simpseven_plot, percell_prod_vs_simpseven_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
Prepare Figure S3
plot_all_rich_percell <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Observed Richness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 790, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$coefficients[,4][2]), digits = 6))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_shannon_percell <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Shannon Entropy") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 5.25, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_invsimps_percell <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 70, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_simpseven_percell <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", fill = "grey") +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 0.12, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$coefficients[,4][2]), digits = 2))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(plot_all_rich_percell,
plot_all_shannon_percell + theme(axis.title.y = element_blank()),
plot_all_invsimps_percell + theme(axis.title.y = element_blank()),
plot_all_simpseven_percell + theme(axis.title.y = element_blank()),
align = "h", labels = c("A", "B", "C", "D"),
rel_widths = c(1.05, 0.9, 0.9, 0.9),
nrow = 1, ncol = 4)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).

Perform Ridge and Lasso regression
Per capita Production
percell_lasso_data_df_ALL <- lasso_data_df %>%
dplyr::select(-c(fracprod_per_cell, frac_bacprod,
tot_bacprod, SD_tot_bacprod,SD_frac_bacprod,
lakesite, season, norep_filter_name, station, BGA_cellspermL, Turb_NTU)) %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf))
set.seed(777)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(log10_percell ~ ., percell_lasso_data_df_ALL)[,-1]
y = percell_lasso_data_df_ALL$log10_percell
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = TRUE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 0.438887
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 0.3199992
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(lasso_divs)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 36 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.608716e+00
## fractionFree -3.928840e-01
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -1.204696e+00
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL -3.818350e-05
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac -3.387069e-07
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund .
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 -2.169245e-02
## PC6 .
## Richness 1.026611e-03
## Shannon_Entropy .
## Inverse_Simpson .
## Simpsons_Evenness .
## unweighted_div -1.874834e-02
## weighted_div .
Figure 3: Unweighted ses.mpd
######################################################### SES MPD DISTRIBUTION: UNWEIGHTED
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.4970703
## 2 Free 0.4375166
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
unweight_distribution_plot <-
ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.35, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -195.66 -96.12 -14.17 80.15 295.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 443.89 28.29 15.690 1.98e-13 ***
## mpd.obs.z -124.90 34.37 -3.634 0.00147 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared: 0.3751, Adjusted R-squared: 0.3467
## F-statistic: 13.2 on 1 and 22 DF, p-value: 0.001467
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -191.75 -112.16 -41.91 55.88 278.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 515.16 51.16 10.069 1.49e-06 ***
## mpd.obs.z -83.76 49.91 -1.678 0.124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared: 0.2198, Adjusted R-squared: 0.1417
## F-statistic: 2.816 on 1 and 10 DF, p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.15 -66.22 -18.72 43.66 172.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.075 42.750 7.885 1.34e-05 ***
## mpd.obs.z 3.083 77.507 0.040 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared: 0.0001582, Adjusted R-squared: -0.09983
## F-statistic: 0.001583 on 1 and 10 DF, p-value: 0.9691
unweight_rich_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) + ylab("Observed Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.144 -4.312 -1.822 3.403 19.527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.213 2.617 3.139 0.0105 *
## mpd.obs.z -3.510 2.553 -1.375 0.1991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.07492
## F-statistic: 1.891 on 1 and 10 DF, p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.003 -14.934 2.196 8.756 36.997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.183 8.396 2.166 0.0556 .
## mpd.obs.z 13.428 15.223 0.882 0.3984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared: 0.07219, Adjusted R-squared: -0.02059
## F-statistic: 0.7781 on 1 and 10 DF, p-value: 0.3984
unweight_prod_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7226 -0.2897 0.0040 0.1294 1.3193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1988 0.0999 -72.057 < 2e-16 ***
## mpd.obs.z -0.4972 0.1205 -4.127 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4779 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4478, Adjusted R-squared: 0.4215
## F-statistic: 17.03 on 1 and 21 DF, p-value: 0.0004795
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37972 -0.24190 -0.16526 0.04437 1.18153
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9247 0.1711 -40.472 1.71e-11 ***
## mpd.obs.z -0.3298 0.1627 -2.027 0.0733 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4649 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3135, Adjusted R-squared: 0.2372
## F-statistic: 4.109 on 1 and 9 DF, p-value: 0.07327
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63981 -0.29782 0.07682 0.17625 0.76499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.55633 0.19602 -38.55 3.29e-12 ***
## mpd.obs.z -0.04279 0.35539 -0.12 0.907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared: 0.001447, Adjusted R-squared: -0.09841
## F-statistic: 0.01449 on 1 and 10 DF, p-value: 0.9066
unweight_percell_vs_mpd_plot <-
ggplot(unweighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
#stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot, unweight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Figure S6: Weighted ses.mpd
######################################################### SES MPD DISTRIBUTION: WEIGHTED
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.3607944
## 2 Free -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
weight_distribution_plot <-
ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.55, fontface = "bold", size = 3.5, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.42 -19.15 0.40 12.95 40.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.58 12.61 1.870 0.0911 .
## mpd.obs.z -32.98 29.43 -1.121 0.2886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.02276
## F-statistic: 1.256 on 1 and 10 DF, p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0597 -4.3465 -0.8155 3.8832 18.4294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.187 4.720 4.701 0.00084 ***
## mpd.obs.z -4.942 10.486 -0.471 0.64753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared: 0.02173, Adjusted R-squared: -0.0761
## F-statistic: 0.2221 on 1 and 10 DF, p-value: 0.6475
weight_invsimps_vs_mpd_plot <-
ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Inverse Simpson") +
#xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-LITER PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.030 -10.236 -2.842 5.937 38.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.008 5.664 1.590 0.126
## mpd.obs.z -21.439 12.889 -1.663 0.110
##
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared: 0.1117, Adjusted R-squared: 0.07133
## F-statistic: 2.767 on 1 and 22 DF, p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.240 -4.575 -1.717 4.503 15.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.391 4.126 1.064 0.312
## mpd.obs.z -15.432 9.627 -1.603 0.140
##
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared: 0.2044, Adjusted R-squared: 0.1249
## F-statistic: 2.569 on 1 and 10 DF, p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.790 -12.275 -3.481 7.360 30.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.697 9.683 1.518 0.160
## mpd.obs.z -24.285 21.512 -1.129 0.285
##
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.02434
## F-statistic: 1.274 on 1 and 10 DF, p-value: 0.2853
weight_prod_vs_mpd_plot <-
ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94765 -0.40616 -0.03619 0.31885 1.39101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5352 0.2442 -30.860 <2e-16 ***
## mpd.obs.z -0.9519 0.5446 -1.748 0.0951 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6009 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.08544
## F-statistic: 3.055 on 1 and 21 DF, p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65329 -0.32632 -0.03486 0.22224 0.95307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0845 0.3015 -23.496 2.18e-09 ***
## mpd.obs.z -0.9337 0.6753 -1.383 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5096 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.08357
## F-statistic: 1.912 on 1 and 9 DF, p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54189 -0.16159 -0.00204 0.20070 0.44618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9519 0.1848 -43.019 1.11e-12 ***
## mpd.obs.z -0.9777 0.4107 -2.381 0.0386 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.2979
## F-statistic: 5.667 on 1 and 10 DF, p-value: 0.03857
weight_percell_vs_mpd_plot <-
ggplot(weighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Cell counts and Unweighted SESmpd
# Combine the datasets into
cell_nums <- otu_alphadiv %>%
dplyr::select(norep_filter_name, fraction_bac_abund) %>%
distinct()
unweight_cellnums <- cell_nums %>%
left_join(unweighted_df, by = "norep_filter_name") %>%
dplyr::filter(norep_filter_name != "MOTEJ515")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining character vector and factor, coercing into character vector
# Is there a relationship between cell numbers and Unweighted Mean Pairwise distance?
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums)))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4107 -0.1703 0.1661 0.3388 0.7078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2637 0.1119 47.041 < 2e-16 ***
## mpd.obs.z 0.5256 0.1349 3.895 0.000835 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5352 on 21 degrees of freedom
## Multiple R-squared: 0.4194, Adjusted R-squared: 0.3917
## F-statistic: 15.17 on 1 and 21 DF, p-value: 0.0008354
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60108 -0.11615 0.06289 0.23310 0.34729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.61321 0.11608 39.742 2.01e-11 ***
## mpd.obs.z 0.06373 0.11039 0.577 0.578
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3154 on 9 degrees of freedom
## Multiple R-squared: 0.03572, Adjusted R-squared: -0.07143
## F-statistic: 0.3334 on 1 and 9 DF, p-value: 0.5778
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Free")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.294515 -0.070928 -0.008011 0.116064 0.216553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.76213 0.08022 71.829 6.67e-15 ***
## mpd.obs.z 0.16564 0.14544 1.139 0.281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1692 on 10 degrees of freedom
## Multiple R-squared: 0.1148, Adjusted R-squared: 0.02629
## F-statistic: 1.297 on 1 and 10 DF, p-value: 0.2813
ggplot(unweight_cellnums, aes(y = log10(fraction_bac_abund), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(bacterial cells/mL)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=4.25, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = c(0.12, 0.9))

Figure S7
# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))
##
## Kruskal-Wallis rank sum test
##
## data: mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
filter(fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mpd.obs.z), sd(mpd.obs.z))
## # A tibble: 4 x 3
## lakesite `mean(mpd.obs.z)` `sd(mpd.obs.z)`
## <fctr> <dbl> <dbl>
## 1 Outlet 0.7950222 0.5856743
## 2 Deep -0.9216060 0.3564753
## 3 Bear -0.7817952 0.9425671
## 4 River -1.0799021 0.2412376
# Lakesite
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = lakesite)) +
ggtitle("Particle-Associated Samples Only") +
scale_fill_manual(values = lakesite_colors) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) +
geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
theme(axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank())
# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = season)) +
ggtitle("Particle-Associated Samples Only") +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = season_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) +
geom_boxplot(aes(fill = season), alpha = 0.5) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = c(0.9, 0.9), legend.title = element_blank())
plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

plot_station_rich <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Particle Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 4 x 3
## lakesite `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Outlet 430.8567 81.99728
## 2 Deep 472.0833 23.48010
## 3 Bear 637.9933 242.53768
## 4 River 686.2633 135.20325
plot_station_invsimps <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Particle Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean))
## # A tibble: 4 x 2
## lakesite `mean(mean)`
## <fctr> <dbl>
## 1 Outlet 23.66717
## 2 Deep 23.76370
## 3 Bear 35.36997
## 4 River 59.10553
plot_grid(plot_station_rich, plot_station_invsimps,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

#### FREE LIVING
plot_station_rich_FL <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Free-Living Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 4 x 3
## lakesite `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Outlet 297.5467 74.24756
## 2 Deep 313.9267 58.54588
## 3 Bear 293.9033 55.85800
## 4 River 448.3200 64.10530
plot_station_invsimps_FL <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Free-Living Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean))
## # A tibble: 4 x 2
## lakesite `mean(mean)`
## <fctr> <dbl>
## 1 Outlet 23.87979
## 2 Deep 22.42163
## 3 Bear 19.06537
## 4 River 31.00198
plot_grid(plot_station_rich_FL, plot_station_invsimps_FL,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

Plot Figure S5: Randomized Richness
# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
#random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
#random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData"))
load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2911 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
## Calculate input for SES_MPD
# Convert the presence/absence data to standardized abundanced vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 906 574 434 268 256 358 493 447 476 276 284 381 840 632 586 452 383 511 505 343 444 274 238 381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm
# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
df <- unweighted_sesMPD_indepswap_randomized
df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.74 -129.44 -12.54 53.58 468.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 448.27 35.48 12.633 1.47e-11 ***
## mpd.obs.z 56.32 94.24 0.598 0.556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared: 0.01598, Adjusted R-squared: -0.02875
## F-statistic: 0.3572 on 1 and 22 DF, p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -203.87 -109.15 -44.96 44.29 341.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 557.62 50.67 11.006 6.56e-07 ***
## mpd.obs.z -33.22 140.62 -0.236 0.818
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared: 0.00555, Adjusted R-squared: -0.0939
## F-statistic: 0.05581 on 1 and 10 DF, p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))
##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.76 -48.80 -17.85 56.29 159.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 342.48 24.62 13.913 7.19e-08 ***
## mpd.obs.z 74.88 62.79 1.193 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared: 0.1245, Adjusted R-squared: 0.03699
## F-statistic: 1.422 on 1 and 10 DF, p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1,1)) +
theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))
